Download monthly data from January 2015M1 to 2022M10 to estimate the followings:
First, we will import necessary libraries to chose stocks to invest in by comparing risk & returns. If there are stocks that offers lower returns with higher risk, we are going to exclude those stocks from the portfolio.
#Import Libraries
library(tidyverse, quietly = TRUE)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5 v purrr 0.3.4
## v tibble 3.1.6 v dplyr 1.0.8
## v tidyr 1.2.0 v stringr 1.4.0
## v readr 2.1.2 v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(quantmod)
## 필요한 패키지를 로딩중입니다: xts
## 필요한 패키지를 로딩중입니다: zoo
##
## 다음의 패키지를 부착합니다: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
##
## 다음의 패키지를 부착합니다: 'xts'
## The following objects are masked from 'package:dplyr':
##
## first, last
## 필요한 패키지를 로딩중입니다: TTR
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
library(tidyquant)
## 필요한 패키지를 로딩중입니다: lubridate
##
## 다음의 패키지를 부착합니다: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
## 필요한 패키지를 로딩중입니다: PerformanceAnalytics
##
## 다음의 패키지를 부착합니다: 'PerformanceAnalytics'
## The following object is masked from 'package:graphics':
##
## legend
## == Need to Learn tidyquant? ====================================================
## Business Science offers a 1-hour course - Learning Lab #9: Performance Analysis & Portfolio Optimization with tidyquant!
## </> Learn more at: https://university.business-science.io/p/learning-labs-pro </>
library(PortfolioAnalytics)
## 필요한 패키지를 로딩중입니다: foreach
##
## 다음의 패키지를 부착합니다: 'foreach'
## The following objects are masked from 'package:purrr':
##
## accumulate, when
library(PerformanceAnalytics)
library(timetk)
library(corrplot)
## corrplot 0.92 loaded
library(GGally)
## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
library(plotly)
##
## 다음의 패키지를 부착합니다: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(DT)
library(Metrics)
library(forecast)
##
## 다음의 패키지를 부착합니다: 'forecast'
## The following object is masked from 'package:Metrics':
##
## accuracy
#Stock selection process
#Analyze multiple stocks
#Create symbols
symbols.a <- c("AMD","NVDA","MSFT","KO","PEP","AMZN","AAPL","ET","NFLX","NKE",
"DIS","WMT","ADBE","SPY")
#Download stock data using tidyquant
stocks.prices.a <- tq_get(symbols.a,get = "stock.prices",from = "2015-01-01",
to = "2019-12-31") %>%
group_by(symbol)
stocks.prices.a
## # A tibble: 17,598 x 8
## # Groups: symbol [14]
## symbol date open high low close volume adjusted
## <chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 AMD 2015-01-02 2.67 2.67 2.67 2.67 0 2.67
## 2 AMD 2015-01-05 2.67 2.7 2.64 2.66 8878200 2.66
## 3 AMD 2015-01-06 2.65 2.66 2.55 2.63 13912500 2.63
## 4 AMD 2015-01-07 2.63 2.65 2.54 2.58 12377600 2.58
## 5 AMD 2015-01-08 2.59 2.65 2.56 2.61 11136600 2.61
## 6 AMD 2015-01-09 2.63 2.64 2.58 2.63 8907600 2.63
## 7 AMD 2015-01-12 2.62 2.64 2.55 2.63 9979600 2.63
## 8 AMD 2015-01-13 2.64 2.68 2.6 2.66 17907400 2.66
## 9 AMD 2015-01-14 2.6 2.66 2.58 2.63 9989900 2.63
## 10 AMD 2015-01-15 2.62 2.65 2.49 2.52 17744000 2.52
## # ... with 17,588 more rows
Tidyquant is a package that imports stock prices from yahoo finance in a tibble format, which helps visualizing the data more easily by using ggplot2 package.
#Compute monthly returns of the multiple stocks
multpl_stock_monthly_returns.a <- stocks.prices.a %>%
group_by(symbol) %>%
tq_transmute(select = adjusted,
mutate_fun = periodReturn,
period = 'monthly',
col_rename = 'returns')
multpl_stock_monthly_returns.a
## # A tibble: 840 x 3
## # Groups: symbol [14]
## symbol date returns
## <chr> <date> <dbl>
## 1 AMD 2015-01-30 -0.0375
## 2 AMD 2015-02-27 0.210
## 3 AMD 2015-03-31 -0.138
## 4 AMD 2015-04-30 -0.157
## 5 AMD 2015-05-29 0.00885
## 6 AMD 2015-06-30 0.0526
## 7 AMD 2015-07-31 -0.196
## 8 AMD 2015-08-31 -0.0622
## 9 AMD 2015-09-30 -0.0497
## 10 AMD 2015-10-30 0.233
## # ... with 830 more rows
#Computing monthly risk & return for each year
p.a <- multpl_stock_monthly_returns.a%>%
mutate(year=year(date)) %>%
group_by(symbol,year) %>%
summarise(Return = mean(returns),
Risk = sd(returns))
## `summarise()` has grouped output by 'symbol'. You can override using the
## `.groups` argument.
p.a
## # A tibble: 70 x 4
## # Groups: symbol [14]
## symbol year Return Risk
## <chr> <dbl> <dbl> <dbl>
## 1 AAPL 2015 0.000113 0.0638
## 2 AAPL 2016 0.0125 0.0752
## 3 AAPL 2017 0.0352 0.0616
## 4 AAPL 2018 0.000159 0.102
## 5 AAPL 2019 0.0560 0.0677
## 6 ADBE 2015 0.0232 0.0522
## 7 ADBE 2016 0.00876 0.0493
## 8 ADBE 2017 0.0469 0.0598
## 9 ADBE 2018 0.0239 0.0723
## 10 ADBE 2019 0.0331 0.0593
## # ... with 60 more rows
#Visualize monthly risk & return for each year
p.1 <- ggplot(p.a, aes(Risk, Return, color = symbol)) +
geom_point(aes(frame = year, ids = symbol), size = 3) +
scale_x_continuous(labels = scales::percent) +
scale_y_continuous(labels = scales::percent) +
theme(text=element_text(size = 16, family="Arial")) +
ggtitle("Risk Return Plot By Calendar Year")
## Warning: Ignoring unknown aesthetics: frame, ids
ggplotly(p.1) %>% animation_opts(1000, easing="elastic", redraw = FALSE)
#Compute 5 year risk & return
p.b <- multpl_stock_monthly_returns.a %>%
group_by(symbol) %>%
summarise(Return = mean(returns),
Risk = sd(returns))
#Visualize 5year risk & return
p.2 <- ggplot(p.b, aes(Risk, Return, color = symbol)) +
geom_point(stat='identity', size = 4, show.legend = TRUE)+
scale_x_continuous(labels = scales::percent) +
scale_y_continuous(labels = scales::percent) +
theme(text=element_text(size = 16, family="Arial")) +
ggtitle("Total Risk Return Plot")
ggplotly(p.2) %>% animation_opts(500, easing='elastic',redraw = FALSE)
Now, we can see the monthly risk & return, as well as the overall risk % return of the stock. By using ggplotly, we can directly see each stock’s performance figure and compare it with different stocks.
From the scatter plot, we can see that some stocks have lower risks but offer higher return. Thus, we will create a portfolio with, PEP, MSFT, AMZN, NVDA, and AMD. Before we go over any deep analysis, we will implement several basic quantitative analysis regarding stocks’ performance
#Create a portoflio with PEP, MSFT, AMZN, NCDA, AMD
symbols <- c('AMD','AMZN','MSFT', 'NVDA', 'PEP')
stocks.prices <- tq_get(symbols,get = "stock.prices",
from = "2015-01-01",
to = "2019-12-31") %>% group_by(symbol)
stocks.prices
## # A tibble: 6,285 x 8
## # Groups: symbol [5]
## symbol date open high low close volume adjusted
## <chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 AMD 2015-01-02 2.67 2.67 2.67 2.67 0 2.67
## 2 AMD 2015-01-05 2.67 2.7 2.64 2.66 8878200 2.66
## 3 AMD 2015-01-06 2.65 2.66 2.55 2.63 13912500 2.63
## 4 AMD 2015-01-07 2.63 2.65 2.54 2.58 12377600 2.58
## 5 AMD 2015-01-08 2.59 2.65 2.56 2.61 11136600 2.61
## 6 AMD 2015-01-09 2.63 2.64 2.58 2.63 8907600 2.63
## 7 AMD 2015-01-12 2.62 2.64 2.55 2.63 9979600 2.63
## 8 AMD 2015-01-13 2.64 2.68 2.6 2.66 17907400 2.66
## 9 AMD 2015-01-14 2.6 2.66 2.58 2.63 9989900 2.63
## 10 AMD 2015-01-15 2.62 2.65 2.49 2.52 17744000 2.52
## # ... with 6,275 more rows
#Chart stock prices
p.3 <- stocks.prices %>%
ggplot(aes(x = date, y = adjusted, color = symbol)) +
geom_line() +
ylab('Stock Prices') + xlab('Date') +
ggtitle("Price chart for multiple stocks")
ggplotly(p.3)
#Charting stock prices #2
stocks.prices %>%
ggplot(aes(x = date, y = adjusted)) +
geom_line() +
facet_wrap(~symbol, scales = "free_y") + # facet_wrap is used to make diff frames
theme_tq() + # using a new theme
labs(x = "Date", y = "Price") +
ggtitle("Price chart for stocks")
#Computing stock returns for our portfolio
multpl_stock_monthly_returns <- stocks.prices %>%
group_by(symbol) %>%
tq_transmute(select = adjusted,
mutate_fun = periodReturn,
period = 'monthly',
col_rename = 'returns')
#Charting stock returns
multpl_stock_monthly_returns %>%
select(symbol,date,returns) %>%
ggplot(aes(x=date,y=returns,col=symbol)) +
scale_y_continuous(labels = scales::percent,
breaks = seq(-0.5,0.75,0.1)) +
geom_bar(stat='identity') +
ggtitle('Monthly Return')+
facet_wrap(~symbol)
#Charting monthly returns using box plot
p.4 <- multpl_stock_monthly_returns %>%
ggplot(aes(symbol, returns)) +
geom_boxplot(aes(color=symbol)) +
labs(x="symbols", y="returns")
ggplotly(p.4)
As we can see from the return graphs, it seems that AMD has the highest volatility while PEP has the lowest volatility. Next, we are going to compute the monthly risk & return.
#Compare monthly mean returns for each stocks
p.5 <- multpl_stock_monthly_returns%>%
mutate(year=year(date)) %>%
group_by(symbol,year) %>%
summarise(mean = mean(returns),
sd = sd(returns)) %>%
ggplot(aes(x = year, y = mean, fill = symbol)) +
geom_bar(stat = "identity", position = "dodge", width = 0.7) +
scale_y_continuous(breaks = seq(-0.1,0.4,0.02),
labels = scales::percent) +
scale_x_continuous(breaks = seq(2012,2022,1)) +
labs(x = "Year", y = "Mean Returns") +
theme_tq() +
theme(legend.position = "top") +
scale_fill_brewer(palette="Paired") + theme_minimal() +
ggtitle("Monthly Mean returns")
## `summarise()` has grouped output by 'symbol'. You can override using the
## `.groups` argument.
ggplotly(p.5)
#Compare monthly risk for each stock
p.6 <- multpl_stock_monthly_returns%>%
mutate(year=year(date)) %>%
group_by(symbol,year) %>%
summarise(mean = mean(returns),
sd = sd(returns)) %>%
ggplot(aes(x = year, y = sd, fill = symbol)) +
geom_bar(stat = "identity", position = "dodge", width = 0.7) +
scale_y_continuous(breaks = seq(-0.1,0.4,0.02),
labels = scales::percent) +
scale_x_continuous(breaks = seq(2012,2022,1)) +
labs(x = "Year", y = "Std Dev") +
theme_tq() +
theme(legend.position = "top") +
scale_fill_brewer(palette="Paired") + theme_minimal() +
ggtitle("Monthly Std Dev")
## `summarise()` has grouped output by 'symbol'. You can override using the
## `.groups` argument.
ggplotly(p.6)
Apparently, AMD has the biggest volatility compared to other stocks while PEP showed the lowest risk among the 5 stocks. However, AMD’s return was significant as the risk was high.
Now, we are going to verify whether the return of the stock is normally distributed and also compute the correlation of each stock.
#Verifying normal distribution
multpl_stock_monthly_returns %>%
ggplot(aes(returns)) +
geom_density(lwd=1) +
geom_histogram(aes(y=..density..))+
facet_wrap(~symbol) + xlab('return')
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Although some data looks messy, it seems that the returns are all normally distributed, and we can expect that the return of the portfolio will be noramally distributed too.
#Computing correlation
multpl_stock_monthly_returns %>%
ungroup() %>%
select(date,returns,symbol) %>%
pivot_wider(id_cols = date, names_from = 'symbol', values_from = 'returns') %>%
select(-date) %>%
ggpairs
multpl_stock_monthly_returns %>%
spread(symbol,value=returns) %>%
tk_xts(silent = TRUE) %>%
cor() %>%
corrplot()
From the data, it seems that MSFT and AMZN has the highest correlation of 0.52. Beyond that, all of the stocks have statistically weak positive correlation, which would help diversify the risk of the portfolio.
To construct an efficient frontier and apply MPT concepts, we have to convert the data into xts format first for calculation purpose.
#Compute log daily returns
log_ret_daily <- stocks.prices %>%
group_by(symbol) %>%
tq_transmute(select = adjusted,
mutate_fun = periodReturn,
period = 'daily',
col_rename = 'returns',
type = 'log')
head(log_ret_daily)
## # A tibble: 6 x 3
## # Groups: symbol [1]
## symbol date returns
## <chr> <date> <dbl>
## 1 AMD 2015-01-02 0
## 2 AMD 2015-01-05 -0.00375
## 3 AMD 2015-01-06 -0.0113
## 4 AMD 2015-01-07 -0.0192
## 5 AMD 2015-01-08 0.0116
## 6 AMD 2015-01-09 0.00763
#Convert tibble into xts
log_ret_xts <- log_ret_daily %>%
spread(symbol, value = returns) %>%
tk_xts()
## Warning: Non-numeric columns being dropped: date
## Using column `date` for date_var.
head(log_ret_xts)
## AMD AMZN MSFT NVDA PEP
## 2015-01-02 0.000000000 0.000000000 0.000000000 0.000000000 0.000000000
## 2015-01-05 -0.003752350 -0.020730712 -0.009238112 -0.017034845 -0.007546600
## 2015-01-06 -0.011342277 -0.023098034 -0.014786255 -0.030787308 -0.007603689
## 2015-01-07 -0.019194447 0.010543966 0.012625209 -0.002609274 0.028821501
## 2015-01-08 0.011560822 0.006812743 0.028994037 0.036927410 0.018011182
## 2015-01-09 0.007633625 -0.011818213 -0.008440650 0.004019935 -0.006793688
Now, the data is converted into time series format. To utilize MPT, we have to create matrix for return and risks
#Calculating mean return for each asset
mean_ret <- colMeans(log_ret_xts)
mean_ret
## AMD AMZN MSFT NVDA PEP
## 0.0022562238 0.0014236052 0.0010523651 0.0019749353 0.0004119731
#Create variance-covariance matrix
cov_mat <- cov(log_ret_xts)*252 #Annualize daily return
cov_mat
## AMD AMZN MSFT NVDA PEP
## AMD 0.39141219 0.051225796 0.03834009 0.11973625 0.012229482
## AMZN 0.05122580 0.083988444 0.04220953 0.05043909 0.009737129
## MSFT 0.03834009 0.042209528 0.05409649 0.04704958 0.012455068
## NVDA 0.11973625 0.050439090 0.04704958 0.17890260 0.010449014
## PEP 0.01222948 0.009737129 0.01245507 0.01044901 0.020950908
Below will be the basic assumptions and formula we will use create efficient frontier and define tangency portfolio
# Calculate the random weights
wts <- runif(n = length(symbols))
wts <- wts/sum(wts) # to normalize the weight into total of 1
# Calculate the portfolio returns
port_returns <- (sum(wts * mean_ret) + 1)^252 - 1
# Calculate the portfolio risk
port_risk <- sqrt(t(wts) %*% (cov_mat %*% wts))
#Define risk free rate
risk_free <- 0.0412
# Calculate the Sharpe Ratio
sharpe_ratio <- (port_returns-risk_free)/port_risk
num_port <- 5000
# Creating a matrix to store the weights
all_wts <- matrix(nrow = num_port,
ncol = length(symbols))
# Creating an empty vector to store portfolio returns
port_returns <- vector('numeric', length = num_port)
# Creating an empty vector to store portfolio standard deviation
port_risk <- vector('numeric', length = num_port)
# Creating an empty vector to store portfolio Sharpe ratio
sharpe_ratio <- vector('numeric', length = num_port)
for (i in seq_along(port_returns)) {
wts <- runif(length(symbols))
wts <- wts/sum(wts)
# Storing weight in the matrix
all_wts[i,] <- wts
# Portfolio returns
port_ret <- sum(wts * mean_ret)
port_ret <- ((port_ret + 1)^252) - 1
# Storing Portfolio Returns values
port_returns[i] <- port_ret
# Creating and storing portfolio risk
port_sd <- sqrt(t(wts) %*% (cov_mat %*% wts))
port_risk[i] <- port_sd
# Creating and storing Portfolio Sharpe Ratios
# Assuming 4.12% Risk free rate
sr <- (port_ret-risk_free)/port_sd
sharpe_ratio[i] <- sr
}
# Storing the values in the table
portfolio_values <- tibble(Return = port_returns,
Risk = port_risk,
SharpeRatio = sharpe_ratio)
# Converting matrix to a tibble and changing column names
all_wts <- tk_tbl(all_wts)
## Warning in tk_tbl.data.frame(as.data.frame(data), preserve_index,
## rename_index, : Warning: No index to preserve. Object otherwise converted to
## tibble successfully.
colnames(all_wts) <- colnames(log_ret_xts)
portfolio_values <- tk_tbl(cbind(all_wts, portfolio_values))
## Warning in tk_tbl.data.frame(cbind(all_wts, portfolio_values)): Warning: No
## index to preserve. Object otherwise converted to tibble successfully.
head(portfolio_values)
## # A tibble: 6 x 8
## AMD AMZN MSFT NVDA PEP Return Risk SharpeRatio
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 0.237 0.419 0.223 0.0485 0.0725 0.456 0.264 1.57
## 2 0.133 0.0900 0.542 0.0415 0.193 0.340 0.207 1.44
## 3 0.636 0.107 0.0255 0.0817 0.150 0.588 0.431 1.27
## 4 0.253 0.297 0.174 0.134 0.142 0.459 0.265 1.58
## 5 0.459 0.206 0.113 0.200 0.0218 0.594 0.372 1.49
## 6 0.231 0.229 0.240 0.0846 0.216 0.407 0.239 1.53
Now, we will find out the GMV and tangency point portfolio. GMV will be the weights that will minimize variance while tangency portfolio will be the weights that will maximize the Sharpe ratio.
min_var <- portfolio_values[which.min(portfolio_values$Risk),]
max_sr <- portfolio_values[which.max(portfolio_values$SharpeRatio),]
#Compute GMV portoflio weights
GMV <- min_var %>%
gather(AMD:PEP, key = Asset,
value = Weights) %>%
mutate(Asset = as.factor(Asset)) %>%
ggplot(aes(x = fct_reorder(Asset,Weights), y = Weights, fill = Asset)) +
geom_bar(stat = 'identity') +
theme_minimal() +
labs(x = 'Assets', y = 'Weights', title = "Minimum Variance Portfolio Weights") +
scale_y_continuous(labels = scales::percent)
ggplotly(GMV)
We can see that 75% of the stocks are allocated to PEP for GMV portfolio since it has the lowest risk.
Tangency <- max_sr %>%
gather(AMD:PEP, key = Asset,
value = Weights) %>%
mutate(Asset = as.factor(Asset)) %>%
ggplot(aes(x = fct_reorder(Asset,Weights), y = Weights, fill = Asset)) +
geom_bar(stat = 'identity') +
theme_minimal() +
labs(x = 'Assets', y = 'Weights', title = "Tangency Portfolio Weights") +
scale_y_continuous(labels = scales::percent)
ggplotly(Tangency)
For tangency portoflio, we can see that most of the assets are allocated to AMD and NVDA since they both have a higher returns and higher risks. Since the Sharpe ratio = 1.69, risk free rate = 0.0412, the CAL is CAL = 0.0412 + 1.69*sigma(p)
EF <- portfolio_values %>%
ggplot(aes(x = Risk, y = Return, color = SharpeRatio)) +
geom_point() +
theme_classic() +
scale_y_continuous(labels = scales::percent) +
scale_x_continuous(labels = scales::percent) +
labs(x = 'Annualized Risk',
y = 'Annualized Returns',
title = "Portfolio Optimization & Efficient Frontier") +
geom_point(aes(x = Risk,
y = Return), data = min_var, color = 'red',size=3) +
geom_point(aes(x = Risk,
y = Return), data = max_sr, color = 'red', size=3) +
annotate('text', x = 0.24, y = 0.60, label = "Tangency Portfolio") +
annotate('text', x = 0.23, y = 0.14, label = "Minimum variance portfolio") +
annotate('text', x = 0.40, y = 0.84, label = "Capital Allocation Line") +
geom_abline(aes(
intercept = 0.0412,
slope = 1.69
))
ggplotly(EF)
## Warning: `gather_()` was deprecated in tidyr 1.2.0.
## Please use `gather()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.
From the analysis, the optimum weight for AMD, AMZN, MSFT, NVDA, and PEP is 13%, 43%, 2%, 41%, and 1%.
Although these weights are going to be used for this project, we are going to add box constraint for minimum allocation of 5% and maximum allocation of 35% when it comes to building portfolio for GSBA 542 project which will be computed in the last page using PerformanceAnalytics package.
First, we will import new stock from 2020M1 to 2022M8 and then allocate each weight for the stocks
#Allocate weights for each stocks
#Import new stock data
wts.tp.a <- c(0.12,0.49,0.08,0.29,0.02)
stocks.prices.b <- tq_get(symbols,get = "stock.prices",from = "2020-01-01",to = "2022-8-31") %>%
group_by(symbol)
head(stocks.prices.b)
## # A tibble: 6 x 8
## # Groups: symbol [1]
## symbol date open high low close volume adjusted
## <chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 AMD 2020-01-02 46.9 49.2 46.6 49.1 80331100 49.1
## 2 AMD 2020-01-03 48.0 49.4 47.5 48.6 73127400 48.6
## 3 AMD 2020-01-06 48.0 48.9 47.9 48.4 47934900 48.4
## 4 AMD 2020-01-07 49.3 49.4 48.0 48.2 58061400 48.2
## 5 AMD 2020-01-08 47.8 48.3 47.1 47.8 53767000 47.8
## 6 AMD 2020-01-09 48.9 50.0 48.4 49.0 76512800 49.0
#Compute Return for the new stock data
multpl_stock_monthly_returns.b <- stocks.prices.b %>%
group_by(symbol) %>%
tq_transmute(select = adjusted,
mutate_fun = periodReturn,
period = 'monthly',
col_rename = 'returns')
multpl_stock_monthly_returns.b
## # A tibble: 160 x 3
## # Groups: symbol [5]
## symbol date returns
## <chr> <date> <dbl>
## 1 AMD 2020-01-31 -0.0428
## 2 AMD 2020-02-28 -0.0323
## 3 AMD 2020-03-31 0
## 4 AMD 2020-04-30 0.152
## 5 AMD 2020-05-29 0.0269
## 6 AMD 2020-06-30 -0.0221
## 7 AMD 2020-07-31 0.472
## 8 AMD 2020-08-31 0.173
## 9 AMD 2020-09-30 -0.0972
## 10 AMD 2020-10-30 -0.0817
## # ... with 150 more rows
#Allocate the tangency portfolio weights for each stock
port_ret_tp.b <- multpl_stock_monthly_returns.b %>%
tq_portfolio(assets_col = symbol,
returns_col = returns,
weights = wts.tp.a,
col_rename = 'port_ret_tp',
geometric = FALSE)
## Warning: `spread_()` was deprecated in tidyr 1.2.0.
## Please use `spread()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.
port_ret_tp.b
## # A tibble: 32 x 2
## date port_ret_tp
## <date> <dbl>
## 1 2020-01-31 0.0249
## 2 2020-02-28 0.00203
## 3 2020-03-31 0.00640
## 4 2020-04-30 0.194
## 5 2020-05-29 0.0611
## 6 2020-06-30 0.0904
## 7 2020-07-31 0.164
## 8 2020-08-31 0.149
## 9 2020-09-30 -0.0566
## 10 2020-10-30 -0.0524
## # ... with 22 more rows
port_ret_tp.b %>%
mutate(cr = cumprod(1 + port_ret_tp)) %>%
ggplot(aes(x = date, y = cr*100)) +
geom_line() +
labs(x = 'Date',
y = 'Index',
title = 'Tangency Portoflio Index') +
scale_y_continuous(breaks = seq(100,300,50)) +
scale_x_date(date_breaks = 'year',
date_labels = '%Y')
As we can see, the index has increased from 100 to 200, showing 100% rate of return. Now, we will compare this index with the S&P 500.
#Import S&P 500
market.prices <- tq_get("SPY",get = "stock.prices",from = "2020-01-01",to = "2022-08-31")
market_return <- market.prices %>%
tq_transmute(select = adjusted,
mutate_fun = periodReturn,
period = 'monthly',
col_rename = 'market_ret')
#Chart S&P 500 Index from 100
market_return %>%
mutate(cr = cumprod(1 + market_ret)) %>%
ggplot(aes(x = date, y = cr*100)) +
geom_line() +
labs(x = 'Date',
y = 'Index',
title = 'S&P 500 Index') +
scale_y_continuous(breaks = seq(100,200,50)) +
scale_x_date(date_breaks = 'year',
date_labels = '%Y')
#Chart both indexes in one graph
b <- port_ret_tp.b %>%
mutate(cr = (cumprod(1 + port_ret_tp))*100) %>%
mutate(symbol = 'TP')
b <- subset(b,select = -c(port_ret_tp))
c <- market_return %>%
mutate(cr = (cumprod(1 + market_ret))*100) %>%
mutate(symbol = 'Market')
c <- subset(c,select = -c(market_ret))
bc <- rbind(b,c)
p.7 <- bc %>%
ggplot(aes(x=date,y=cr,color=symbol))+
geom_line() +
xlab('Date')+
ylab('Index')+
ggtitle("Portfolio vs S&P 500")
ggplotly(p.7)
Apparently, the tangency portfolio offered higher return than the market while showing higher volatility recently.
First we will build the Security market line of portfolio. We will use the 10yr bond yield as the risk-free rate.
To build a SML, we need 1. Portfolio Return 2. Market Return 3. Risk-Free Rate
#Portfolio Return
head(port_ret_tp.b)
## # A tibble: 6 x 2
## date port_ret_tp
## <date> <dbl>
## 1 2020-01-31 0.0249
## 2 2020-02-28 0.00203
## 3 2020-03-31 0.00640
## 4 2020-04-30 0.194
## 5 2020-05-29 0.0611
## 6 2020-06-30 0.0904
#Market Return
head(market_return)
## # A tibble: 6 x 2
## date market_ret
## <date> <dbl>
## 1 2020-01-31 -0.00967
## 2 2020-02-28 -0.0792
## 3 2020-03-31 -0.125
## 4 2020-04-30 0.127
## 5 2020-05-29 0.0476
## 6 2020-06-30 0.0177
#Import Risk-Free rate data
getSymbols("^TNX",
src = 'yahoo',
from = "2020-01-01",
to = "2022-08-31",
auto.assign = TRUE,
warnings = FALSE)
## Warning: ^TNX contains missing values. Some functions will not work if objects
## contain missing values in the middle of the series. Consider using na.omit(),
## na.approx(), na.fill(), etc to remove or replace them.
## [1] "^TNX"
head(TNX)
## TNX.Open TNX.High TNX.Low TNX.Close TNX.Volume TNX.Adjusted
## 2020-01-02 1.903 1.903 1.851 1.882 0 1.882
## 2020-01-03 1.828 1.840 1.786 1.788 0 1.788
## 2020-01-05 NA NA NA NA NA NA
## 2020-01-06 1.785 1.816 1.766 1.811 0 1.811
## 2020-01-07 1.797 1.828 1.797 1.827 0 1.827
## 2020-01-08 1.823 1.876 1.802 1.874 0 1.874
Since we are dealing with monthly return data, we are going to change the data structure into monthly data for computation purpose.
#Convert into monthly value
TNX <- TNX %>%
to.monthly()
## Warning in to.period(x, "months", indexAt = indexAt, name = name, ...): missing
## values removed from data
TNX <- TNX[,6]
head(TNX)
## ..Adjusted
## 1 2020 1.520
## 2 2020 1.127
## 3 2020 0.698
## 4 2020 0.622
## 5 2020 0.648
## 6 2020 0.653
#Converting into tibble
TNX <- as.tibble(TNX)
## Warning: `as.tibble()` was deprecated in tibble 2.0.0.
## Please use `as_tibble()` instead.
## The signature and semantics have changed, see `?as_tibble`.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.
#Converting risk free rate into percentage and change column name
TNX <- TNX %>%
mutate(rf = ..Adjusted/100)
TNX <- TNX %>%
subset(select = -c(..Adjusted))
TNX
## # A tibble: 32 x 1
## rf
## <dbl>
## 1 0.0152
## 2 0.0113
## 3 0.00698
## 4 0.00622
## 5 0.00648
## 6 0.00653
## 7 0.00536
## 8 0.00693
## 9 0.00677
## 10 0.0086
## # ... with 22 more rows
Now, we are going to create a new tibble that contains all the 3 essential information.
#Merge all the data
SML <- cbind(port_ret_tp.b,market_return,TNX)
#Drop one date column since it's duplicated
SML <- subset(SML,select = -c(date))
SML <- SML %>%
relocate(date)
SML <- as.tibble(SML)
SML
## # A tibble: 32 x 4
## date port_ret_tp market_ret rf
## <date> <dbl> <dbl> <dbl>
## 1 2020-01-31 0.0249 -0.00967 0.0152
## 2 2020-02-28 0.00203 -0.0792 0.0113
## 3 2020-03-31 0.00640 -0.125 0.00698
## 4 2020-04-30 0.194 0.127 0.00622
## 5 2020-05-29 0.0611 0.0476 0.00648
## 6 2020-06-30 0.0904 0.0177 0.00653
## 7 2020-07-31 0.164 0.0589 0.00536
## 8 2020-08-31 0.149 0.0698 0.00693
## 9 2020-09-30 -0.0566 -0.0374 0.00677
## 10 2020-10-30 -0.0524 -0.0249 0.0086
## # ... with 22 more rows
Now, we have to create a new column that computes risk premium which equals to market return - risk free rate
#Compute risk premium
SML <- SML %>%
mutate(risk_premium = market_ret - rf)
SML
## # A tibble: 32 x 5
## date port_ret_tp market_ret rf risk_premium
## <date> <dbl> <dbl> <dbl> <dbl>
## 1 2020-01-31 0.0249 -0.00967 0.0152 -0.0249
## 2 2020-02-28 0.00203 -0.0792 0.0113 -0.0904
## 3 2020-03-31 0.00640 -0.125 0.00698 -0.132
## 4 2020-04-30 0.194 0.127 0.00622 0.121
## 5 2020-05-29 0.0611 0.0476 0.00648 0.0412
## 6 2020-06-30 0.0904 0.0177 0.00653 0.0112
## 7 2020-07-31 0.164 0.0589 0.00536 0.0535
## 8 2020-08-31 0.149 0.0698 0.00693 0.0629
## 9 2020-09-30 -0.0566 -0.0374 0.00677 -0.0442
## 10 2020-10-30 -0.0524 -0.0249 0.0086 -0.0335
## # ... with 22 more rows
Remember that Security Market Line : E(R) = Beta*Risk premium + Rf. Since the data is computing monthly time series data, we will use the annualized market return and to avoid using negative figure for computing risk premium. Also, we will use average risk free rate to calculate the expected return of the portfolio.
A_rp <- mean(SML$market_ret+1)^12-1 - mean(SML$rf)
Ex_Beta <- tibble(Beta=seq(0,2,length.out=32), Ex_ret = (A_rp*Beta)/12+mean(SML$rf)/12)
Ex_Beta
## # A tibble: 32 x 2
## Beta Ex_ret
## <dbl> <dbl>
## 1 0 0.00124
## 2 0.0645 0.00179
## 3 0.129 0.00235
## 4 0.194 0.00290
## 5 0.258 0.00345
## 6 0.323 0.00400
## 7 0.387 0.00455
## 8 0.452 0.00510
## 9 0.516 0.00566
## 10 0.581 0.00621
## # ... with 22 more rows
Ex_Beta %>%
ggplot(aes(y = Ex_ret, x = Beta)) +
geom_point(col='cornflowerblue', size=4) +
xlab('Beta') +
ylab('Expected Return') +
geom_point(aes(x =1.25,
y = mean(SML$port_ret_tp)),
color = "red",
size = 4) +
annotate('text',x = 1.45,
y = mean(SML$port_ret_tp),label='Portfolio',size=5)+
annotate('text',x = 1.6, y = 0.01,
label = "E(R) = 0.1*Beta + 0.001",size=5) +
ggtitle('Security Market Line') +
scale_y_continuous(labels = scales::percent)
The Security Market Line shows that our monthly return for the portfolio considering the Beta should be approximately 1.25%. However, our portfolio’s average monthly return is about 2.3% which means that the result is above the theoretical figure.
ggplot(data = SML, aes(y = port_ret_tp, x = risk_premium)) +
geom_point(col='cornflowerblue', size=4) +
xlab('Risk Premium') +
ylab('Expected Return') +
ggtitle('Portfolio Return & Risk Premium') +
geom_smooth(method='lm', color = 'darkviolet', size =0.8) +
annotate('text', x= 0.1,y=-0.1, label = 'Y=1.24X+0.03',size=5)+
scale_y_continuous(labels = scales::percent) +
scale_x_continuous(labels = scales::percent)
## `geom_smooth()` using formula 'y ~ x'
summary(lm(SML$port_ret_tp~SML$risk_premium))
##
## Call:
## lm(formula = SML$port_ret_tp ~ SML$risk_premium)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.130373 -0.043364 -0.005446 0.041403 0.139203
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.03095 0.01234 2.508 0.0178 *
## SML$risk_premium 1.24191 0.20345 6.104 1.04e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.06952 on 30 degrees of freedom
## Multiple R-squared: 0.554, Adjusted R-squared: 0.5391
## F-statistic: 37.26 on 1 and 30 DF, p-value: 1.042e-06
From the result above, we can see that the higher the risk premium, the higher the return of the portfolio.
Now, we are going to calculate the beta of our portfolio and compare the relationship between portfolio returns and market returns.
First, we are going to scatter plot the risk & return of the market, portfolio, and each stock to see the overall performance
#Merge market return and individual stock returns
spy_multpl_monthly_return <- market_return %>%
mutate(symbol = 'SPY',
returns = market_ret) %>%
relocate(symbol) %>%
subset(select = -c(market_ret)) %>%
rbind(multpl_stock_monthly_returns.b)
spy_multpl_monthly_return
## # A tibble: 192 x 3
## symbol date returns
## <chr> <date> <dbl>
## 1 SPY 2020-01-31 -0.00967
## 2 SPY 2020-02-28 -0.0792
## 3 SPY 2020-03-31 -0.125
## 4 SPY 2020-04-30 0.127
## 5 SPY 2020-05-29 0.0476
## 6 SPY 2020-06-30 0.0177
## 7 SPY 2020-07-31 0.0589
## 8 SPY 2020-08-31 0.0698
## 9 SPY 2020-09-30 -0.0374
## 10 SPY 2020-10-30 -0.0249
## # ... with 182 more rows
p.8 <- spy_multpl_monthly_return %>%
group_by(symbol) %>%
summarise(expected_return = mean(returns),
stand_dev = sd(returns)) %>%
ggplot(aes(x = stand_dev, y = expected_return, color = symbol)) +
geom_point(size = 4) +
geom_point(aes(x = sd(port_ret_tp.b$port_ret_tp),
y = mean(port_ret_tp.b$port_ret_tp)),
color = "cornflowerblue",
size = 5) +
geom_text(
aes(x = sd(port_ret_tp.b$port_ret_tp) * 1.09,
y = mean(port_ret_tp.b$port_ret_tp),
label = "Portfolio")) +
geom_text(
aes(x= sd(market_return$market_ret) *1.14,
y= mean(market_return$market_ret), label = 'Market'
)) +
ylab("expected return") +
xlab("standard deviation") +
ggtitle("Expected Monthly Returns v. Risk") +
scale_y_continuous(labels = scales::percent) +
scale_x_continuous(labels = scales::percent)
ggplotly(p.8)
From the scatter plot, it is apparent that our portfolio has lower risk but higher risk than investing in AMZN due to the asset distribution effect.
Since we saw our portfolio risk & return, we are going to calculate the Beta, which indicates the volatility compared to S&P 500.
Refer that the formula for computing beta is : {beta}_{portfolio} = cov(R_p, R_m)/sigma_m) Or it is the slope of the regression line between market return and portfolio return
#Compute Beta #1
Beta_port <- cov(port_ret_tp.b$port_ret_tp,market_return$market_ret)/
var(market_return$market_ret)
Beta_port
## [1] 1.257211
#Compute Beta #2
summary(lm(SML$port_ret_tp~SML$market_ret))
##
## Call:
## lm(formula = SML$port_ret_tp ~ SML$market_ret)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.144818 -0.050078 -0.009622 0.043862 0.151114
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.01228 0.01286 0.955 0.347
## SML$market_ret 1.25721 0.21889 5.743 2.86e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.07184 on 30 degrees of freedom
## Multiple R-squared: 0.5237, Adjusted R-squared: 0.5078
## F-statistic: 32.99 on 1 and 30 DF, p-value: 2.862e-06
We can see that the Beta of the portfolio using both methods give the same figure. The beta of the portfolio 1.25, meaning that it is 25% more volatile than the market.
Now, we are going to draw the regression line for our CAPM model.
p.9 <- port_ret_tp.b %>%
mutate(market_returns = market_return$market_ret) %>%
ggplot(aes(x = market_returns, y = port_ret_tp)) +
geom_point(size=4)+
geom_point(color = "cornflowerblue",size = 3) +
geom_smooth(method = "lm", se = FALSE, color = "red", size = 1) +
ylab("portfolio returns") +
xlab("market returns") +
ggtitle("Regression Line of CAPM") +
annotate('text', x= 0.1,y=-0.1, label = 'Y=1.25X+0.01(t=0.9)',size=5)+
scale_y_continuous(labels = scales::percent) +
scale_x_continuous(labels = scales::percent)
ggplotly(p.9)
## `geom_smooth()` using formula 'y ~ x'
Jensen alpha is the difference between actual return and expected return from the CAPM model
SML$expected_return = SML$rf + SML$risk_premium * Beta_port
SML$Alpha = SML$port_ret_tp - SML$expected_return
datatable(SML)
t.test(SML$Alpha, mu=0.16)
##
## One Sample t-test
##
## data: SML$Alpha
## t = -11.61, df = 31, p-value = 8.082e-13
## alternative hypothesis: true mean is not equal to 0.16
## 95 percent confidence interval:
## -0.009157552 0.041393214
## sample estimates:
## mean of x
## 0.01611783
t.test(SML$risk_premium, mu=-0.13)
##
## One Sample t-test
##
## data: SML$risk_premium
## t = 11.466, df = 31, p-value = 1.11e-12
## alternative hypothesis: true mean is not equal to -0.13
## 95 percent confidence interval:
## -0.02774146 0.01651017
## sample estimates:
## mean of x
## -0.005615645
p.10 <- ggplot(data = SML, aes(x=date, y= Alpha)) +
geom_point(col='cornflowerblue', size=4) +
xlab('Date') +
ylab('Jensen Alpha') +
geom_abline(aes(slope=0,intercept=0),color='red',size=1)+
scale_y_continuous(labels = scales::percent) +
scale_x_date(date_breaks="3 months", date_labels="%Y-%m")+
annotate(geom = 'text',y=0.167,x=as.Date('2020-05-30'),label = 'COVID',color=c('#9900CC'),size=4.5) +
ggtitle('Monthly Jensen Alpha')
ggplotly(p.10)
A.Portoflio 1.CV of portoflio: SIGMA/MEAN
#Calculate CV
CV_p <- sd(port_ret_tp.b$port_ret_tp)/mean(port_ret_tp.b$port_ret_tp)
CV_p
## [1] 4.270936
SR_p <- (mean(port_ret_tp.b$port_ret_tp+1)^(32*12/36)-1-risk_free)/sd(port_ret_tp.b$port_ret_tp)
SR_p
## [1] 2.405553
TR_p <- (mean(port_ret_tp.b$port_ret_tp+1)^(32*12/36)-1-risk_free)/Beta_port
TR_p
## [1] 0.1959266
SO_p <- SortinoRatio(port_ret_tp.b$port_ret_tp)
SO_p
## [,1]
## Sortino Ratio (MAR = 0%) 0.3953987
To compare with the portoflio, we will import ETF that has a similar beta to the portoflio
#Download BlackRock's semiconductor ETF
SOXX <- tq_get('SOXX',get = "stock.prices",from = "2020-01-01",
to = "2022-08-31") %>%
group_by(symbol)
SOXX_return <- SOXX %>%
group_by(symbol) %>%
tq_transmute(select = adjusted,
mutate_fun = periodReturn,
period = 'monthly',
col_rename = 'returns',
method = 'log')
head(SOXX_return)
## # A tibble: 6 x 3
## # Groups: symbol [1]
## symbol date returns
## <chr> <date> <dbl>
## 1 SOXX 2020-01-31 -0.0516
## 2 SOXX 2020-02-28 -0.0464
## 3 SOXX 2020-03-31 -0.112
## 4 SOXX 2020-04-30 0.145
## 5 SOXX 2020-05-29 0.0716
## 6 SOXX 2020-06-30 0.0778
B. SOXX
1.CV of SOXX: SIGMA/MEAN
CV_SOXX <- sd(SOXX_return$returns)/mean(SOXX_return$returns)
CV_SOXX
## [1] 5.279398
SR_SOXX <- (mean(SOXX_return$returns+1)^(32*12/36)-1-risk_free)/sd(SOXX_return$returns)
SR_SOXX
## [1] 1.699182
Beta_SOXX <- cov(SOXX_return$returns,market_return$market_ret)/
var(market_return$market_ret)
TR_SOXX <- (mean(SOXX_return$returns+1)^(32*12/36)-1-risk_free)/Beta_SOXX
TR_SOXX
## [1] 0.1186559
SO_SOXX <- SortinoRatio(SOXX_return$returns)
SO_SOXX
## [,1]
## Sortino Ratio (MAR = 0%) 0.294809
If we create a tibble and a graph to see the difference more clearly….
P_SOXX <- tibble(
Symbol = c('Portoflio','Portoflio','Portoflio','Portoflio','SOXX','SOXX','SOXX','SOXX'),
Statistics = c('CV','Sharpe','Treynor','Sortino','CV','Sharpe','Treynor','Sortino'),
Values = c(CV_p,SR_p,TR_p,SO_p,CV_SOXX,SR_SOXX,TR_SOXX,SO_SOXX))
P_SOXX
## # A tibble: 8 x 3
## Symbol Statistics Values
## <chr> <chr> <dbl>
## 1 Portoflio CV 4.27
## 2 Portoflio Sharpe 2.41
## 3 Portoflio Treynor 0.196
## 4 Portoflio Sortino 0.395
## 5 SOXX CV 5.28
## 6 SOXX Sharpe 1.70
## 7 SOXX Treynor 0.119
## 8 SOXX Sortino 0.295
P__SOXX_1 <- P_SOXX%>%
ggplot(aes(x = Statistics, y = Values, fill = Symbol)) +
geom_bar(stat = "identity", position = "dodge", width = 0.7) +
labs(x = "Statistics", y = "Values") +
theme_tq() +
theme(legend.position = "top") +
scale_fill_manual(values=c("#E69F00", "#56B4E9"))+
ggtitle("Different Statistics Results")
ggplotly(P__SOXX_1)
Since VaR is calculating the density of the qunatile, we will use qnorm function to compute the 1 month, six months, and one year VaR.
#Setting one month VaR Values
alpha=0.02
mu_onemonths <- mean(port_ret_tp.b$port_ret_tp)
sd_onemonths <- sd(port_ret_tp.b$port_ret_tp)
VaR_Port_one <- qnorm(alpha, mean = mu_onemonths, sd = sd_onemonths)
VaR_Port_one
## [1] -0.1863222
The result implicates that about 2% of the time, the expected return of the portfolio could be worse than -18.6%.
#Setting 6 months VaR Values
mu_sixmonths <- mean(port_ret_tp.b$port_ret_tp)*6
sd_sixmonths <- sd(port_ret_tp.b$port_ret_tp)*sqrt(6)
VaR_Port_six <- qnorm(alpha, mean = mu_sixmonths, sd = sd_sixmonths)
VaR_Port_six
## [1] -0.3712699
#Setting 1 year VaR Values
mu_year <- mean(port_ret_tp.b$port_ret_tp)*12
sd_year <- sd(port_ret_tp.b$port_ret_tp)*sqrt(12)
VaR_Port_year <- qnorm(alpha, mean = mu_year, sd = sd_year)
VaR_Port_year
## [1] -0.4407885
Now we are going to do the same process for the ETF we imported at Q.6.
#Setting one month VaR for SOXX
mu_onemonths_SOXX <- mean(SOXX_return$returns)
sd_onemonths_SOXX <- sd(SOXX_return$returns)
SOXX_Port_one <- qnorm(alpha, mean = mu_onemonths_SOXX, sd = sd_onemonths_SOXX)
SOXX_Port_one
## [1] -0.158152
#Setting six month VaR for SOXX
mu_sixmonths_SOXX <- mean(SOXX_return$returns)*6
sd_sixmonths_SOXX <- sd(SOXX_return$returns)*sqrt(6)
VaR_SOXX_six <- qnorm(alpha, mean = mu_sixmonths_SOXX, sd = sd_sixmonths_SOXX)
VaR_SOXX_six
## [1] -0.3303415
#Setting one year VaR for SOXX
mu_year_SOXX <- mean(SOXX_return$returns)*12
sd_year_SOXX <- sd(SOXX_return$returns)*sqrt(12)
VaR_SOXX_year <- qnorm(alpha, mean = mu_year_SOXX, sd = sd_year_SOXX)
VaR_SOXX_year
## [1] -0.4106983
Since we have the different VaR results for the portoflio and SOXX, we are going to visualize it to see the difference more clearly.
VaR_P_SOXX <- tibble(
Symbol = c('Portoflio','Portoflio','Portoflio','SOXX','SOXX','SOXX'),
Statistics = c('30_VaR','180_VaR','365_VaR','30_VaR','180_VaR','365_VaR'),
Values = abs(c(VaR_Port_one,VaR_Port_six,VaR_Port_year,SOXX_Port_one,VaR_SOXX_six,VaR_SOXX_year)))
VaR_P_SOXX
## # A tibble: 6 x 3
## Symbol Statistics Values
## <chr> <chr> <dbl>
## 1 Portoflio 30_VaR 0.186
## 2 Portoflio 180_VaR 0.371
## 3 Portoflio 365_VaR 0.441
## 4 SOXX 30_VaR 0.158
## 5 SOXX 180_VaR 0.330
## 6 SOXX 365_VaR 0.411
P__SOXX_2 <- VaR_P_SOXX%>%
mutate(Statistics = fct_reorder(Statistics,Values)) %>% # For reoredring data
ggplot(aes(x = Statistics, y = Values, fill = Symbol)) +
geom_bar(stat = "identity", position = "dodge", width = 0.7) +
labs(x = "VaR", y = "Values") +
theme_tq() +
theme(legend.position = "top") +
scale_fill_brewer(palette="Pastel1") + theme_minimal()+
scale_y_continuous(labels = scales::percent)+
ggtitle("VaR: Portoflio vs. SOXX")
ggplotly(P__SOXX_2)
port_ret_tp.b %>%
mutate(market_returns = market_return$market_ret) %>%
ggplot(aes(x = market_returns, y = port_ret_tp)) +
geom_point(size=4)+
geom_point(color = "cornflowerblue",size = 3) +
geom_smooth(method = "lm", se = FALSE, color = "red", size = 1) +
ylab("portfolio returns") +
xlab("market returns") +
ggtitle("Regression Line of CAPM") +
annotate('text', x= 0.1,y=-0.1, label = 'Y=1.25X+0.01(t=0.95)',size=5)+
scale_y_continuous(labels = scales::percent) +
scale_x_continuous(labels = scales::percent)
## `geom_smooth()` using formula 'y ~ x'
summary(lm(SML$port_ret_tp~SML$market_ret))
##
## Call:
## lm(formula = SML$port_ret_tp ~ SML$market_ret)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.144818 -0.050078 -0.009622 0.043862 0.151114
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.01228 0.01286 0.955 0.347
## SML$market_ret 1.25721 0.21889 5.743 2.86e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.07184 on 30 degrees of freedom
## Multiple R-squared: 0.5237, Adjusted R-squared: 0.5078
## F-statistic: 32.99 on 1 and 30 DF, p-value: 2.862e-06
x <- SML$risk_premium # This should x axis
y <- SML$port_ret_tp # This should be y axis
data <- data.frame(y,x)
data1 <- data[1:30,]
data2 <- data[31:32,]
reg <- lm(y~x, data= data1)
pred <- predict(reg, newdata = data2, se.fit = TRUE)
Expost_fit <- reg$fitted.values
Expost_fit[31:32]=NA
EXPOST <- tibble(date=SML$date,return=SML$port_ret_tp,symbol='Actual')
EXPOST1 <- tibble(date = SML$date,return = Expost_fit, symbol = 'Ex_Post')
EXPOST2 <- rbind(EXPOST,EXPOST1)
p.11 <- na.omit(EXPOST2) %>%
ggplot(aes(x=date,y=return,color=symbol)) +
geom_line(stat='identity') +
geom_line(size=1.5) +
xlab('Date') +
ylab('Portoflio Return') +
scale_y_continuous(labels = scales::percent) +
scale_x_date(date_breaks="6 months", date_labels="%Y-%m")+
scale_color_manual(values=c("#E69F00", "#56B4E9"))+
ggtitle('Actual vs Ex-Post Forecast')
ggplotly(p.11)
mae_post <- mae(c(EXPOST$return[1:30]),c(na.omit(EXPOST1$return)))*100
mape_post <- mape(c(EXPOST$return[1:30]),c(na.omit(EXPOST1$return)))
rmse_post <- rmse(c(EXPOST$return[1:30]),c(na.omit(EXPOST1$return)))*100
mase_post <- mase(c(EXPOST$return[1:30]),c(na.omit(EXPOST1$return)))*100
Results_post <- tibble(Method = c('EX_POST_F'),
MAE = c(mae_post),
MAPE = c(mape_post),
RMSE = c(rmse_post),
MASE = c(mase_post))
datatable(Results_post)
#Return = Beta * Risk Premium
reg2 <- lm(y~x, data=data)
fore <- forecast(reg2, newdata = data.frame(x=c(0.05,0.11)),se,fit=TRUE)
fore
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 1 0.09304497 -0.0006416754 0.1867316 -0.05296500 0.2390549
## 2 0.16755964 0.0700537175 0.2650656 0.01559735 0.3195219
The data above shows the expected return of M9 and M10, respectively 16% and 9%.
#Graph ex-ante forecast
autoplot(fore) +
theme_gray()+
labs(title='Ex-Ante Forecast',x='Risk Premium',y='Portoflio Return')+
scale_y_continuous(labels = scales::percent)+
scale_x_continuous(labels = scales::percent)
#Compute stock return from 2022 M1 to 2022 M10
stocks.prices.c <- tq_get(symbols,get = "stock.prices",from = "2021-01-01",to = "2022-10-31") %>% group_by(symbol)
multpl_stock_monthly_returns.c <- stocks.prices.c %>%
group_by(symbol) %>%
tq_transmute(select = adjusted,
mutate_fun = periodReturn,
period = 'monthly',
col_rename = 'returns')
port_ret_tp.c <- multpl_stock_monthly_returns.c %>%
tq_portfolio(assets_col = symbol,
returns_col = returns,
weights = wts.tp.a,
col_rename = 'port_ret_tp',
geometric = FALSE)
#Create New Index form 2021 to 2022
Port_Index <- port_ret_tp.c %>%
mutate(Index = cumprod(1 + port_ret_tp)*100,
symbol = 'Portfolio') %>%
subset(select = -c(port_ret_tp))
Port_Index %>%
ggplot(aes(x = date, y = Index)) +
geom_line() +
labs(x = 'Date',
y = 'Index',
title = 'Tangency Portoflio Index from 2021 to 2022') +
scale_y_continuous(breaks = seq(40,140,20))
#Generate naive forecast
naive <- c(NA, Port_Index$Index[-length(Port_Index$Index)])
naive
## [1] NA 99.57905 99.23397 97.96785 108.34630 106.93577 120.61340
## [8] 120.33012 129.23969 121.19533 135.34424 154.01327 144.65458 125.88950
## [15] 128.06949 135.00662 102.21607 102.92346 87.90634 107.83914 97.42861
## [22] 82.81401
Naive_df <- tibble(date=Port_Index$date,
Index = naive,
symbol = 'Naive')
#Plot Naive Forecast
Naive_Port_Index <- rbind(Port_Index,Naive_df)
p.12 <- na.omit(Naive_Port_Index) %>%
ggplot(aes(x=date,y=Index,color=symbol))+
geom_line(size=1.5) +
ggtitle("Actual & Naive Forecast")
ggplotly(p.12)
#Compute measure statistics for naive forecast
mae_naive <- mae(c(Port_Index$Index[-1]),c(na.omit(Naive_df$Index)))
mape_naive <- mape(c(Port_Index$Index[-1]),c(na.omit(Naive_df$Index)))*100
rmse_naive <- rmse(c(Port_Index$Index[-1]),c(na.omit(Naive_df$Index)))
mase_naive <- mase(c(Port_Index$Index[-1]),c(na.omit(Naive_df$Index)))
#Plot MA(15) of the Index
Port_Index %>%
ggplot(aes(x = date, y = Index)) +
geom_line(color='cornflowerblue',size=1) +
labs(x = 'Date',
y = 'Index',
title = 'Tangency Portoflio Index MA(15)') +
scale_y_continuous(breaks = seq(40,140,20)) +
geom_ma(ma_fun = SMA, n = 15,color='red',size = 1.4)
MA_Port <- SMA(Port_Index$Index,15)
Port_Index$Index[15:22]
## [1] 135.00662 102.21607 102.92346 87.90634 107.83914 97.42861 82.81401
## [8] 82.78881
mae_ma15 <- mae(c(Port_Index$Index[15:22]),c(na.omit(MA_Port)))
mape_ma15 <- mape(c(Port_Index$Index[15:22]),c(na.omit(MA_Port)))*100
rmse_ma15 <- rmse(c(Port_Index$Index[15:22]),c(na.omit(MA_Port)))
mase_ma15 <- mase(c(Port_Index$Index[15:22]),c(na.omit(MA_Port)))
#Exponential Smoothing
Port_Index_xts <- Port_Index %>%
tk_xts()
## Warning: Non-numeric columns being dropped: date, symbol
## Using column `date` for date_var.
fit1 <- ses(Port_Index_xts, alpha=0.2, initial="simple", h=3)
fit2 <- ses(Port_Index_xts, alpha=0.6, initial="simple", h=3)
fit3 <- ses(Port_Index_xts, h=3)
autoplot(fit1) +
autolayer(fitted(fit1),size = 1.2, series = 'Alpha = 0.2')+
autolayer(fitted(fit2), size = 1.2,series = 'Alpha = 0.6')+
autolayer(fitted(fit3), size = 1.2,series = 'Alpha = 0.814')+
autolayer(fit1$mean, color = 'red',size = 1.2) +
autolayer(fit2$mean, color = 'green',size =1.2) +
autolayer(fit3$mean, color = 'orange', size = 1.2)+
theme_minimal()+
ylab('Index')
mae_exp <- mae(c(Port_Index$Index),c(fit3[["fitted"]]))
mape_exp <- mape(c(Port_Index$Index),c(fit3[["fitted"]]))*100
rmse_exp <- rmse(c(Port_Index$Index),c(fit3[["fitted"]]))
mase_exp <- mase(c(Port_Index$Index),c(fit3[["fitted"]]))
Results <- tibble(Method = c('Naive','MA(15)','Exponential'),
MAE = c(mae_naive,mae_ma15,mae_exp),
MAPE = c(mape_naive,mape_ma15,mape_exp),
RMSE = c(rmse_naive,rmse_ma15,rmse_exp),
MASE = c(mase_naive,mase_ma15,mase_exp))
datatable(Results)
All of the Statistics tell that Exponential Smoothing method had the lowest error and MA(15) had the highest error measure.
From here, We are going to implement some codes just for fun. First we are going to use the PerformanceAnalytics package to find optimal weights for GMV portfolio. However, we are going to allocate at least 10% of weight to each stock and constraint the maximum weight to 40%.
#Using Performance Analytics library to implement efficient frointer
#Changing data into xts
returns <- multpl_stock_monthly_returns.b %>%
select(symbol,date,returns) %>%
pivot_wider(names_from = symbol, values_from = returns) %>%
tk_xts(silent = TRUE) * 100
returns
## AMD AMZN MSFT NVDA PEP
## 2020-01-31 -4.276982 5.83295154 5.9830766 -1.4505474 4.56485571
## 2020-02-28 -3.234043 -6.22137200 -4.5688194 14.2966381 -7.03422284
## 2020-03-31 0.000000 3.50205707 -2.6541554 -2.3956772 -8.42162702
## 2020-04-30 15.193489 26.89001190 13.6326187 10.8801396 10.14988619
## 2020-05-29 2.691353 -1.27849397 2.5391302 21.4656992 -0.55937996
## 2020-06-30 -2.211892 12.95667241 11.0559190 7.0597169 1.32010872
## 2020-07-31 47.177340 14.71136274 0.7370708 11.7606705 4.08288133
## 2020-08-31 17.293039 9.04610295 10.2752045 25.9992156 1.74341818
## 2020-09-30 -9.722530 -8.75785906 -6.7396834 1.1966521 -0.33083377
## 2020-10-30 -8.171725 -3.57540866 -3.7369780 -7.3648433 -3.83117331
## 2020-11-30 23.070797 4.34398710 6.0060411 6.9211804 8.20767334
## 2020-12-31 -1.025259 2.80583843 3.9005923 -2.5567389 3.55439090
## 2021-01-29 -6.618689 -1.55760121 4.2891924 -0.4998090 -7.90963490
## 2021-02-26 -1.319473 -3.53284141 0.4117907 5.5794025 -5.40382508
## 2021-03-31 -7.111587 0.03717834 1.4588154 -2.6369516 10.36413183
## 2021-04-30 3.974526 12.06627340 6.9601698 12.4454426 1.91588214
## 2021-05-28 -1.886794 -7.04702566 -0.7627354 8.2281277 2.62209074
## 2021-06-30 17.295202 6.73549926 8.4988820 23.1622109 0.88718165
## 2021-07-30 13.052275 -3.27222869 5.1716513 -2.5171827 5.92562692
## 2021-08-31 4.265937 4.30341716 6.1591213 14.8209920 -0.35680013
## 2021-09-30 -7.062860 -5.35181082 -6.6118972 -7.4558805 -3.16455684
## 2021-10-29 16.841594 2.66024586 17.6290959 23.4166745 7.43967770
## 2021-11-30 31.722524 3.99236975 -0.1282171 27.8053712 -1.12623244
## 2021-12-31 -9.136832 -4.92519682 1.7332757 -9.9810210 9.45288143
## 2022-01-31 -20.604583 -10.28299065 -7.5344887 -16.7454326 -0.10937750
## 2022-02-28 7.956233 2.66725185 -3.7211995 -0.4124775 -5.63623348
## 2022-03-31 -11.350738 6.14372847 3.1861710 11.9157041 2.89536276
## 2022-04-29 -21.785257 -23.75250938 -9.9866999 -32.0274032 2.58693237
## 2022-05-31 19.106647 -3.27643208 -1.8077133 0.6739691 -2.30621900
## 2022-06-30 -24.926369 -11.64592120 -5.5320686 -18.7971101 0.04124567
## 2022-07-29 23.538642 27.05959728 9.3096684 19.8166144 4.98020220
## 2022-08-30 -7.970783 -4.60911533 -6.1308723 -14.8378611 -1.12597093
p <- portfolio.spec(colnames(returns))
p.long <- p %>% add.constraint(type='long_only')
p.box.mvp <- p.long %>%
add.constraint(type='full_investment') %>%
add.objective(type='risk',name='StdDev',risk_aversion = 9999) %>%
add.objective(type='return',name='mean') %>%
add.constraint(type='box', min=0.1,max=0.4)
M.box.mvp <- optimize.portfolio(returns, p.box.mvp, optimize_method = 'ROI',
trace = TRUE)
## Registered S3 method overwritten by 'ROI':
## method from
## print.constraint PortfolioAnalytics
Eff <- extractEfficientFrontier(M.box.mvp, match.col = "StdDev", n.portfolios = 50,
risk_aversion = NULL)
## Warning: executing %dopar% sequentially: no parallel backend registered
chart.Weights(M.box.mvp,plot.type = 'barplot')
By utilizing Box constraints, we found out that the weights of the assets are w = (0.1,0.1,0.3,0.1,0.4). Now we are going to compare the performance of the portfolio utilizing the new weights
stocks.prices.c <- tq_get(symbols,get = "stock.prices",from = "2020-01-01",to = "2022-8-31") %>%
group_by(symbol)
multpl_stock_monthly_returns.c <- stocks.prices.c %>%
group_by(symbol) %>%
tq_transmute(select = adjusted,
mutate_fun = periodReturn,
period = 'monthly',
col_rename = 'returns')
multpl_stock_monthly_returns.c
## # A tibble: 160 x 3
## # Groups: symbol [5]
## symbol date returns
## <chr> <date> <dbl>
## 1 AMD 2020-01-31 -0.0428
## 2 AMD 2020-02-28 -0.0323
## 3 AMD 2020-03-31 0
## 4 AMD 2020-04-30 0.152
## 5 AMD 2020-05-29 0.0269
## 6 AMD 2020-06-30 -0.0221
## 7 AMD 2020-07-31 0.472
## 8 AMD 2020-08-31 0.173
## 9 AMD 2020-09-30 -0.0972
## 10 AMD 2020-10-30 -0.0817
## # ... with 150 more rows
wts.gmv <- c(0.1,0.1,0.3,0.1,0.4)
port_ret_tp.c <- multpl_stock_monthly_returns.c %>%
tq_portfolio(assets_col = symbol,
returns_col = returns,
weights = wts.gmv,
col_rename = 'port_ret_GMV',
geometric = FALSE)
port_ret_tp.c %>%
mutate(cr = cumprod(1 + port_ret_GMV)) %>%
ggplot(aes(x = date, y = cr*100)) +
geom_line() +
labs(x = 'Date',
y = 'Index',
title = 'GMV Portoflio Index') +
scale_y_continuous(breaks = seq(100,300,50)) +
scale_x_date(date_breaks = 'year',
date_labels = '%Y')
market.prices <- tq_get("SPY",get = "stock.prices",from = "2020-01-01",to = "2022-08-31")
market_return <- market.prices %>%
tq_transmute(select = adjusted,
mutate_fun = periodReturn,
period = 'monthly',
col_rename = 'market_ret')
#Chart S&P 500 Index from 100
market_return %>%
mutate(cr = cumprod(1 + market_ret)) %>%
ggplot(aes(x = date, y = cr*100)) +
geom_line() +
labs(x = 'Date',
y = 'Index',
title = 'S&P 500 Index') +
scale_y_continuous(breaks = seq(100,200,50)) +
scale_x_date(date_breaks = 'year',
date_labels = '%Y')
b.a <- port_ret_tp.c %>%
mutate(cr = (cumprod(1 + port_ret_GMV))*100) %>%
mutate(symbol = 'GMV')
b.a <- subset(b.a,select = -c(port_ret_GMV))
bc.a <- rbind(b.a,c)
p.13 <- bc.a %>%
ggplot(aes(x=date,y=cr,color=symbol))+
geom_line() +
xlab('Date')+
ylab('Index')+
ggtitle("New Portfolio vs S&P 500")
ggplotly(p.13)
Now, we are going to implement a two-sample mean test and varaince analysis to find whether the risk and return of portoflio and S&P 500 is indifferent or not.
axcx <- left_join(market_return,port_ret_tp.c, by='date') %>%
tk_xts()
## Warning: Non-numeric columns being dropped: date
## Using column `date` for date_var.
#ANOVA
var.test(x=market_return$market_ret,y=port_ret_tp.c$port_ret_GMV, conf.level = 0.95)
##
## F test to compare two variances
##
## data: market_return$market_ret and port_ret_tp.c$port_ret_GMV
## F = 0.90667, num df = 31, denom df = 31, p-value = 0.7868
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.4425841 1.8573877
## sample estimates:
## ratio of variances
## 0.9066699
t.test(x=market_return$market_ret,y=port_ret_tp.c$port_ret_GMV, conf.level = 0.95)
##
## Welch Two Sample t-test
##
## data: market_return$market_ret and port_ret_tp.c$port_ret_GMV
## t = -0.60034, df = 61.852, p-value = 0.5505
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.03927701 0.02113474
## sample estimates:
## mean of x mean of y
## 0.009301853 0.018372988
Unfortunately, there wasn’t any statistical difference in risk & return in investing in two different assets; thus investing in GMV portfolio is statistically same as investing in S&P 500 index
`